knitr::opts_chunk$set(root.dir = "/Users/gaby/Downloads/ex")
library(foreign)
library(tidyverse) # a set of different packages
## ââ Attaching core tidyverse packages ââââââââââââââââââââââââ tidyverse 2.0.0 ââ
## â dplyr 1.1.4 â readr 2.1.5
## â forcats 1.0.0 â stringr 1.5.0
## â ggplot2 3.4.4 â tibble 3.2.1
## â lubridate 1.9.3 â tidyr 1.3.0
## â purrr 1.0.1
## ââ Conflicts ââââââââââââââââââââââââââââââââââââââââââ tidyverse_conflicts() ââ
## â dplyr::filter() masks stats::filter()
## â dplyr::lag() masks stats::lag()
## âč Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(TSstudio) # provides a set of tools for descriptive and predictive analysis of time series data
library(forecast) # provides methods and tools for displaying and analyzing univariate time series forecasts
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(corrplot) # provides a visual exploratory tool on correlation matrix
## corrplot 0.92 loaded
library(ggplot2) # dedicated to data visualization
library(tseries) # time series analysis
library(mFilter) # implements several time series filters useful for smoothing and extracting trend and cyclical
library(dygraphs) # an interface that provides rich facilities for charing time - series
library(stats) # functions for statistical calculations and random number generation
library(astsa) # applied statistical time series analysis
##
## Attaching package: 'astsa'
##
## The following object is masked from 'package:forecast':
##
## gas
library(xts) # convert objects to time series format
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
library(zoo) # displays time series of numeric vectors / matrices / factors
library(AER) # applied econometrics with R
## Warning: package 'AER' was built under R version 4.3.2
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
##
## Loading required package: lmtest
## Loading required package: sandwich
## Loading required package: survival
library(plm) # linear models for panel data
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(vars) # estimation, lag selection, diagnostic testing, forecasting, causality analysis and estimation of
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: strucchange
##
## Attaching package: 'strucchange'
##
## The following object is masked from 'package:stringr':
##
## boundary
##
## Loading required package: urca
library(dynlm) # dynamic linear models and time series regression
library(dplyr) # useful package / tool for working with a data frame. It focuses on data manipulation
library(panelvar) # provides a comprehensive framework for panel vector autoregression models
## Welcome to panelvar! Please cite our package in your publications -- see citation("panelvar")
##
## Attaching package: 'panelvar'
##
## The following object is masked from 'package:vars':
##
## stability
##
## The following object is masked from 'package:tidyr':
##
## extract
library(foreign)
library(RColorBrewer) # offers color palettes
library(lmtest) # testing linear regression models
library(stargazer) # creates well formatted regression tables
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tseries) # time series analysis
SECCIĂN I En este proyecto se trabajarĂĄ con una base de datos panel relacionada con los precios de casas en los diferentes estados de Estados Unidos. Se buscarĂĄ encontrar un modelo que pueda predecir los precios considerando los factores de entidad y de tiempo.
setwd("/Users/gaby/Downloads/ex")
houses <- read_csv("House_Price_USA.csv")
## Rows: 1421 Columns: 10
## ââ Column specification ââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
## Delimiter: ","
## chr (3): names, plate, region.name
## dbl (7): state, year, region, price, income, pop, intrate
##
## âč Use `spec()` to retrieve the full column specification for this data.
## âč Specify the column types or set `show_col_types = FALSE` to quiet this message.
femsa<-read.csv("FEMSAUBD.csv")
femsa2024<-read.csv("FEMSAUBD.MX.csv")
Con el uso de la función summary se puede concluir que estamos trabajando con una base de datos panel balanceada. Asimismo, es posible encontrar las medidas de tendencia central. Se puede observar que el precio promedio es de 99.89, el ingreso promedio es de 9.9, la población promedio es de 5076286, y la tasa de interés promedio es de 4.3.
summary(houses)
## state year names plate
## Min. : 1.0 Min. :1975 Length:1421 Length:1421
## 1st Qu.:18.0 1st Qu.:1982 Class :character Class :character
## Median :30.0 Median :1989 Mode :character Mode :character
## Mean :29.8 Mean :1989
## 3rd Qu.:42.0 3rd Qu.:1996
## Max. :56.0 Max. :2003
## region region.name price income
## Min. :1.000 Length:1421 Min. : 58.09 Min. : 5.910
## 1st Qu.:3.000 Class :character 1st Qu.: 87.55 1st Qu.: 8.677
## Median :5.000 Mode :character Median : 96.87 Median : 9.718
## Mean :4.327 Mean : 99.89 Mean : 9.933
## 3rd Qu.:6.000 3rd Qu.:108.06 3rd Qu.:11.099
## Max. :8.000 Max. :224.12 Max. :18.219
## pop intrate
## Min. : 380477 Min. :-5.544
## 1st Qu.: 1472595 1st Qu.: 3.419
## Median : 3495939 Median : 4.572
## Mean : 5076286 Mean : 4.363
## 3rd Qu.: 5840774 3rd Qu.: 5.687
## Max. :35484453 Max. :11.225
regions <- unique(houses$region.name)
print(regions)
## [1] "Plains" "Southwest" "Far West" "Rocky Mountain"
## [5] "New England" "Mideast" "Southeast" "Great Lakes"
Se convertirån las regiones a valores numéricos
houses$region.name[houses$region.name ==1] ="Plains"
houses$region.name[houses$region.name ==2] ="Southwest"
houses$region.name[houses$region.name ==3] ="Far West"
houses$region.name[houses$region.name ==4] ="Rocky Mountain"
houses$region.name[houses$region.name ==5] ="New England"
houses$region.name[houses$region.name ==6] ="Mideast"
houses$region.name[houses$region.name ==7] ="Southeast"
houses$region.name[houses$region.name ==8] ="Great Lakes"
Se transforma la variable de regiĂłn a categĂłrica
houses$region.name <- as.factor(houses$region.name)
houses$names <- as.factor(houses$names)
summary(houses)
## state year names plate
## Min. : 1.0 Min. :1975 Alabama : 29 Length:1421
## 1st Qu.:18.0 1st Qu.:1982 Arizona : 29 Class :character
## Median :30.0 Median :1989 Arkansas : 29 Mode :character
## Mean :29.8 Mean :1989 California : 29
## 3rd Qu.:42.0 3rd Qu.:1996 Colorado : 29
## Max. :56.0 Max. :2003 Connecticut: 29
## (Other) :1247
## region region.name price income
## Min. :1.000 Plains :348 Min. : 58.09 Min. : 5.910
## 1st Qu.:3.000 Great Lakes :203 1st Qu.: 87.55 1st Qu.: 8.677
## Median :5.000 Mideast :174 Median : 96.87 Median : 9.718
## Mean :4.327 New England :174 Mean : 99.89 Mean : 9.933
## 3rd Qu.:6.000 Rocky Mountain:145 3rd Qu.:108.06 3rd Qu.:11.099
## Max. :8.000 Southeast :145 Max. :224.12 Max. :18.219
## (Other) :232
## pop intrate
## Min. : 380477 Min. :-5.544
## 1st Qu.: 1472595 1st Qu.: 3.419
## Median : 3495939 Median : 4.572
## Mean : 5076286 Mean : 4.363
## 3rd Qu.: 5840774 3rd Qu.: 5.687
## Max. :35484453 Max. :11.225
##
A travĂ©s de las medidas de dispersiĂłn se puede observar que la poblaciĂłn tiene una mayor dispersiĂłn de datos. A travĂ©s de las medidas de rango de las diferentes variables es posible observar que los datos no tienen una gran dispersiĂłn y en realidad estĂĄn dentro de un rango pequeño. Al comparar esto con las medidas de tendencia central tambiĂ©n se puede inferir que habrĂĄn datos atĂpicos debido a que el rango IQR es pequeño pero hay variables con una diferencia del mĂĄximo y el mĂnimo relativamente grande.
varianza<-var(houses$price)
ds<-sd(houses$price)
rango<-diff(range(houses$price))
iqr<-IQR(houses$price)
print(varianza)
## [1] 386.3875
print(ds)
## [1] 19.65674
print(rango)
## [1] 166.0264
print(iqr)
## [1] 20.50941
varianza<-var(houses$income)
ds<-sd(houses$income)
rango<-diff(range(houses$income))
iqr<-IQR(houses$income)
print(varianza)
## [1] 3.111296
print(ds)
## [1] 1.763887
print(rango)
## [1] 12.30894
print(iqr)
## [1] 2.421148
varianza<-var(houses$pop)
ds<-sd(houses$pop)
rango<-diff(range(houses$pop))
iqr<-IQR(houses$pop)
print(varianza)
## [1] 2.945701e+13
print(ds)
## [1] 5427432
print(rango)
## [1] 35103976
print(iqr)
## [1] 4368179
varianza<-var(houses$intrate)
ds<-sd(houses$intrate)
rango<-diff(range(houses$intrate))
iqr<-IQR(houses$intrate)
print(varianza)
## [1] 6.745633
print(ds)
## [1] 2.597236
print(rango)
## [1] 16.76963
print(iqr)
## [1] 2.268536
Se puede observar que las variables tienen sesgos, especĂficamente la de la poblaciĂłn. Las otras variables, a pesar de tener una distribuciĂłn relativamente normal, tambiĂ©n tienen sesgos que pueden afectar el impacto de las variables en el modelo final.
hist(houses$price)
hist(houses$income)
hist(houses$pop)
hist(houses$intrate)
A travĂ©s del diagrama de caja, se pueden observar datos atĂpicos, lo que se puede relacionar con los sesgos vistos en el histograma. De cualquier manera, a travĂ©s de la caja se puede observar que la mayorĂa de los datos estĂĄn concentrados dentro del rango.
boxplot(houses$price)
boxplot(houses$income)
boxplot(houses$pop)
boxplot(houses$intrate)
A través del mapa se puede observar el precio promedio de casas por
estado. Visualmente no se puede observar un impacto significativo por
regiones ya sea en el centro, o cerca de la frontera, o mĂĄs.
library(dplyr)
library(usmap)
library(usmapdata)
## Warning: package 'usmapdata' was built under R version 4.3.2
##
## Attaching package: 'usmapdata'
## The following object is masked from 'package:usmap':
##
## us_map
houses_state <- houses %>% group_by(state) %>% mutate(price = mean(price)) %>% arrange(price)
houses_state$fips <- sprintf("%02d", houses_state$state)
state_id <- usmapdata::fips_data("state")
houses_pd_join <- merge(houses_state, state_id, by.x = "fips", by.y = "fips")
plot_usmap(data = houses_pd_join, values = "price")
A través del gråfico de la serie de tiempo de los precios de hogares se puede observar que la serie no es estacionaria y un gran declive de precios a partir de 1978.
ts_prices<- ts(houses$price, start = c(1975), end = c(2003), frequency = 1)
ts_plot(ts_prices, line.mode = "lines+markers", dash = "dash", Xtitle = "Year", Ytitle = "Dollars", title = "House prices")
Se hizo la prueba de Dickey Fuller para poder confirmar que la serie no es estacionaria. Debido a que el valor de p es mayor a 0.05 no serĂa considerada estacionaria.
adf.test(ts_prices,alternative="stationary")
##
## Augmented Dickey-Fuller Test
##
## data: ts_prices
## Dickey-Fuller = -2.2381, Lag order = 3, p-value = 0.4813
## alternative hypothesis: stationary
Se generĂł un modelo lineal base tomando a consideraciĂłn todas las variables incluidas en el dataframe.
model1<-lm(price ~ income + pop + intrate + region, data= houses)
summary(model1)
##
## Call:
## lm(formula = price ~ income + pop + intrate + region, data = houses)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.501 -10.445 -0.603 8.589 92.005
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.749e+01 2.789e+00 27.782 < 2e-16 ***
## income 4.235e+00 2.519e-01 16.811 < 2e-16 ***
## pop 4.733e-07 7.938e-08 5.962 3.13e-09 ***
## intrate -1.862e+00 1.586e-01 -11.738 < 2e-16 ***
## region -3.222e+00 2.018e-01 -15.967 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.39 on 1416 degrees of freedom
## Multiple R-squared: 0.3889, Adjusted R-squared: 0.3871
## F-statistic: 225.3 on 4 and 1416 DF, p-value: < 2.2e-16
Debido a que los valores son menores a 5, se puede decir que la multicolinealidad no es un problema.
vif(model1)
## income pop intrate region
## 1.183783 1.113042 1.017722 1.071238
Se llevaron a cabo modelos de efectos fijos con todas las variables y es posible observar que los efectos de tiempo son importantes ya que es el modelo que tuvo mejor desempeño.
panel_model1<-plm(price ~ income + pop + intrate + region, data= houses, model="within",effect="twoways")
panel_model2<-plm(price ~ income + pop + intrate + region, data= houses, model="within",effect="individual")
panel_model3<-plm(price ~ income + pop + intrate + region, data= houses, model="within",effect="time") #mejor
summary(panel_model1)
## Twoways effects Within Model
##
## Call:
## plm(formula = price ~ income + pop + intrate + region, data = houses,
## effect = "twoways", model = "within")
##
## Balanced Panel: n = 49, T = 29, N = 1421
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -43.16967 -6.21685 -0.80249 5.92160 48.92844
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## income 1.2298e+01 6.3250e-01 19.4438 < 2.2e-16 ***
## pop 1.7281e-06 3.7314e-07 4.6314 3.986e-06 ***
## intrate -3.0574e+00 3.1695e-01 -9.6464 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 224730
## Residual Sum of Squares: 158980
## R-Squared: 0.29256
## Adj. R-Squared: 0.25088
## F-statistic: 184.855 on 3 and 1341 DF, p-value: < 2.22e-16
summary(panel_model2)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = price ~ income + pop + intrate + region, data = houses,
## effect = "individual", model = "within")
##
## Balanced Panel: n = 49, T = 29, N = 1421
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -47.9442 -7.8578 -1.5584 6.9834 59.9015
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## income 5.2096e+00 2.9520e-01 17.6480 <2e-16 ***
## pop 5.0925e-07 4.0803e-07 1.2481 0.2122
## intrate -1.8838e+00 1.2743e-01 -14.7838 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 287680
## Residual Sum of Squares: 204970
## R-Squared: 0.28751
## Adj. R-Squared: 0.26097
## F-statistic: 184.146 on 3 and 1369 DF, p-value: < 2.22e-16
summary(panel_model3)
## Oneway (time) effect Within Model
##
## Call:
## plm(formula = price ~ income + pop + intrate + region, data = houses,
## effect = "time", model = "within")
##
## Balanced Panel: n = 49, T = 29, N = 1421
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -40.5645 -10.2410 -0.3150 8.9653 79.4120
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## income 4.3061e+00 3.3365e-01 12.9062 < 2.2e-16 ***
## pop 5.0149e-07 7.8273e-08 6.4069 2.029e-10 ***
## intrate -3.9605e+00 4.2027e-01 -9.4238 < 2.2e-16 ***
## region -3.2921e+00 2.0095e-01 -16.3823 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 485710
## Residual Sum of Squares: 305970
## R-Squared: 0.37007
## Adj. R-Squared: 0.35554
## F-statistic: 203.851 on 4 and 1388 DF, p-value: < 2.22e-16
También se generaron modelos utilizando efectos aleatorios y OLS. A través de los mismos se puede observar que el de efectos fijos continua teniendo el mejor desempeño.
panel_model4<-plm(price ~ income + pop + intrate + region, data= houses, model="random")
summary(panel_model4)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = price ~ income + pop + intrate + region, data = houses,
## model = "random")
##
## Balanced Panel: n = 49, T = 29, N = 1421
##
## Effects:
## var std.dev share
## idiosyncratic 149.724 12.236 0.63
## individual 87.752 9.368 0.37
## theta: 0.7643
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -42.8450 -8.1036 -1.3078 6.9597 66.9573
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 6.8036e+01 4.1727e+00 16.3051 < 2.2e-16 ***
## income 5.1359e+00 2.7121e-01 18.9369 < 2.2e-16 ***
## pop 4.3882e-07 2.2261e-07 1.9713 0.04869 *
## intrate -1.8781e+00 1.2729e-01 -14.7537 < 2.2e-16 ***
## region -3.0488e+00 6.6207e-01 -4.6050 4.125e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 302190
## Residual Sum of Squares: 212410
## R-Squared: 0.29708
## Adj. R-Squared: 0.29509
## Chisq: 598.441 on 4 DF, p-value: < 2.22e-16
panel_model5<-plm(price ~ income + pop + intrate + region, data= houses, model="pooling")
summary(panel_model5)
## Pooling Model
##
## Call:
## plm(formula = price ~ income + pop + intrate + region, data = houses,
## model = "pooling")
##
## Balanced Panel: n = 49, T = 29, N = 1421
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -41.50070 -10.44536 -0.60276 8.58930 92.00531
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 7.7494e+01 2.7894e+00 27.7816 < 2.2e-16 ***
## income 4.2345e+00 2.5189e-01 16.8110 < 2.2e-16 ***
## pop 4.7329e-07 7.9379e-08 5.9624 3.134e-09 ***
## intrate -1.8618e+00 1.5862e-01 -11.7375 < 2.2e-16 ***
## region -3.2223e+00 2.0181e-01 -15.9672 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 548670
## Residual Sum of Squares: 335310
## R-Squared: 0.38887
## Adj. R-Squared: 0.38715
## F-statistic: 225.257 on 4 and 1416 DF, p-value: < 2.22e-16
Tests de diagnĂłstico Debido a que el valor de p es menor al 5% el modelo con efectos fijos es mejor que el OLS
pFtest(panel_model3, model1)
##
## F test for time effects
##
## data: price ~ income + pop + intrate + region
## F = 4.7538, df1 = 28, df2 = 1388, p-value = 8.678e-15
## alternative hypothesis: significant effects
Debido a que el valor de p es menor al 5% es recomendable considerar efectos fijos con factor de tiempo
pFtest(panel_model3, panel_model5)
##
## F test for time effects
##
## data: price ~ income + pop + intrate + region
## F = 4.7538, df1 = 28, df2 = 1388, p-value = 8.678e-15
## alternative hypothesis: significant effects
Debido a que el valor de p es menor a 5% es recomendable usar el modelo de efectos fijos sobre el de efectos aleatorios.
phtest(panel_model3, panel_model4)
##
## Hausman Test
##
## data: price ~ income + pop + intrate + region
## chisq = 37.719, df = 4, p-value = 1.281e-07
## alternative hypothesis: one model is inconsistent
Debido a que el valor de p es menor a 5%, los efectos aleatorios son recomendables sobre OLS
plmtest(panel_model5, type=c("bp"))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: price ~ income + pop + intrate + region
## chisq = 2581.6, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
Debido a que en el test de Breusch Pagan el valor de p es menor al 5% hay heteroscedasticidad Debido a que los coeficientes tienen un valor menor a 0.05, son significativos Debido a que los coeficientes son menores a 0.05 se concluye que hay una relación entre las variables predictoras de ingresos, tasa de interés y región y la variable respuesta. Debido a que el de población no salió significativo la heteroscedasticidad puede estar afectando el error eståndar. Debido a que el valor de p en el test de Breusch-Godfrey/Wooldridge es menor a 0.05 se encuentra autocorrelación serial.
bptest(panel_model3)
##
## studentized Breusch-Pagan test
##
## data: panel_model3
## BP = 91.923, df = 4, p-value < 2.2e-16
coeftest(panel_model3)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## income 4.3061e+00 3.3365e-01 12.9062 < 2.2e-16 ***
## pop 5.0149e-07 7.8273e-08 6.4069 2.029e-10 ***
## intrate -3.9605e+00 4.2027e-01 -9.4238 < 2.2e-16 ***
## region -3.2921e+00 2.0095e-01 -16.3823 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(panel_model3, vcovHC)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## income 4.3061e+00 1.0531e+00 4.0889 4.583e-05 ***
## pop 5.0149e-07 2.6927e-07 1.8624 0.06276 .
## intrate -3.9605e+00 9.1453e-01 -4.3306 1.594e-05 ***
## region -3.2921e+00 7.2379e-01 -4.5484 5.877e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pbgtest(panel_model3)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: price ~ income + pop + intrate + region
## chisq = 1109.4, df = 29, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
Correción de modelos Se utiliza el método de Arellano para lidiar con la heteroscedasticidad y la correlación serial. Se obtienen nuevos coeficientes en los que la variable de población ya no es significativa.
coeftest(panel_model3, vcovHC(panel_model3, method = "arellano"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## income 4.3061e+00 1.0531e+00 4.0889 4.583e-05 ***
## pop 5.0149e-07 2.6927e-07 1.8624 0.06276 .
## intrate -3.9605e+00 9.1453e-01 -4.3306 1.594e-05 ***
## region -3.2921e+00 7.2379e-01 -4.5484 5.877e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coefficients_arellano <- c(income = 4.3061e+00, intrate = -3.9605e+00, region = -3.2921e+00)
modelo3_1 <- function(houses, coefficients) {
income <- houses$income
intrate <- houses$intrate
region <- houses$region
nuevo <- lm(price ~ income + intrate + region, data = houses)
return(nuevo)
}
modelo_final <- modelo3_1(houses,coefficients_arellano)
summary(modelo_final)
##
## Call:
## lm(formula = price ~ income + intrate + region, data = houses)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.497 -10.519 -0.533 8.779 91.388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.5434 2.7784 26.83 <2e-16 ***
## income 4.7010 0.2423 19.40 <2e-16 ***
## intrate -1.8568 0.1605 -11.57 <2e-16 ***
## region -3.0610 0.2024 -15.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.57 on 1417 degrees of freedom
## Multiple R-squared: 0.3735, Adjusted R-squared: 0.3722
## F-statistic: 281.6 on 3 and 1417 DF, p-value: < 2.2e-16
AIC(modelo_final)
## [1] 11841.76
Al comparar los coeficientes del modelo seleccionado (efectos fijos con efectos de tiempo) y con los coeficientes de Arellano que intentan lidiar con la heteroscedasticidad y con la autocorrelación serial del modelo, se puede observar que el modelo con los coeficientes de Arellano tiene mejor desempeño. Asimismo, es importante recordar que se eliminó la variable de población con los coeficientes de Arellano ya que resultó no ser una variable significativa. Ambos modelos tienen un buen desempeño, sin embargo, se obtuvo una mejor R cuadrada ajustada con los coeficientes de Arellano y sin considerar la variable de población. Esto lo hace un modelo eficiente y simple. El hecho de que los efectos fijos y efectos de tiempo tuvieran el mejor desempeño indica que las variables no cambian a grandes razgos a través del tiempo y que estas mismas estån correlacionadas con la variable dependiente. Al obtener una R cuadrada ajustada de 0.372 se puede decir que aproximadamente el 37.2% de la variabilidad de los precios de hogares en Estados Unidos es explicada por las variables independientes incluidas en el modelo.
stargazer(panel_model3, modelo_final, title="Panel Regression Analysis", type="text", column.labels = c("Original","Arellano"))
##
## Panel Regression Analysis
## =======================================================================
## Dependent variable:
## ---------------------------------------------------
## price
## panel OLS
## linear
## Original Arellano
## (1) (2)
## -----------------------------------------------------------------------
## income 4.306*** 4.701***
## (0.334) (0.242)
##
## pop 0.00000***
## (0.00000)
##
## intrate -3.961*** -1.857***
## (0.420) (0.161)
##
## region -3.292*** -3.061***
## (0.201) (0.202)
##
## Constant 74.543***
## (2.778)
##
## -----------------------------------------------------------------------
## Observations 1,421 1,421
## R2 0.370 0.374
## Adjusted R2 0.356 0.372
## Residual Std. Error 15.575 (df = 1417)
## F Statistic 203.851*** (df = 4; 1388) 281.625*** (df = 3; 1417)
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Es importante tomar en cuenta que el modelo cuenta con heteroscedasticidad, lo que indica que el modelo no es totalmente preciso.
bptest(modelo_final)
##
## studentized Breusch-Pagan test
##
## data: modelo_final
## BP = 94.916, df = 3, p-value < 2.2e-16
SECCIĂN II
Se puede observar que las acciones de FEMSA tienen un buen desempeño en general debido a que todos los valores son mayores a 0. Esto se puede observar a travĂ©s de los mĂnimos.
summary(femsa)
## Date Open High Low
## Length:365 Min. :113.7 Min. :119.9 Min. :112.7
## Class :character 1st Qu.:155.9 1st Qu.:159.8 1st Qu.:151.1
## Mode :character Median :170.3 Median :174.2 Median :166.8
## Mean :166.4 Mean :170.3 Mean :162.6
## 3rd Qu.:178.7 3rd Qu.:181.9 3rd Qu.:175.5
## Max. :226.7 Max. :228.1 Max. :221.4
## Close Adj.Close Volume
## Min. :113.7 Min. :107.1 Min. : 2483101
## 1st Qu.:154.8 1st Qu.:146.8 1st Qu.: 9359281
## Median :170.7 Median :157.4 Median :12271161
## Mean :166.4 Mean :156.4 Mean :13533991
## 3rd Qu.:178.8 3rd Qu.:165.8 3rd Qu.:16248080
## Max. :223.0 Max. :223.0 Max. :41518042
A travĂ©s de los histogramas se puede observar una distribuciĂłn relativamente normal. Es posible observar sesgos en la mayorĂa de las variables pero de cualquier manera, los datos se ven concentrados.
hist(femsa$Open)
hist(femsa$High)
hist(femsa$Low)
hist(femsa$Close)
hist(femsa$Adj.Close)
hist(femsa$Volume)
A través de los diagramas de caja se puede observar que los valores
estån concentrados ya que el rango IQR es pequeño. De cualquier manera,
se pueden observar valores atĂpicos en todas las variables.
boxplot(femsa$Open)
boxplot(femsa$High)
boxplot(femsa$Low)
boxplot(femsa$Close)
boxplot(femsa$Adj.Close)
boxplot(femsa$Volume)
femsa$Date<-as.Date(femsa$Date)
ts_open<-ts(femsa$Open, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_open,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "Open")
ts_high<-ts(femsa$High, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_high,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "High")
ts_low<-ts(femsa$Low, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_low,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "Low")
ts_close<-ts(femsa$Close, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_close,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "Close")
ts_adj_close<-ts(femsa$Adj.Close, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_adj_close,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "Adj Close")
ts_volume<-ts(femsa$Volume, start = c(2017), end = c(2023), frequency = 52)
ts_plot(ts_volume,line.mode = "lines", Xtitle = "Year", Ytitle = "$MXN", title = "Volume")
adf.test(ts_open) #Debido a que el valor de p es mayor a 0.05 no es estacionaria.
##
## Augmented Dickey-Fuller Test
##
## data: ts_open
## Dickey-Fuller = -2.7561, Lag order = 6, p-value = 0.2576
## alternative hypothesis: stationary
acf(ts_open,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
adf.test(ts_high) #Debido a que el valor de p es mayor a 0.05 no es estacionaria.
##
## Augmented Dickey-Fuller Test
##
## data: ts_high
## Dickey-Fuller = -2.9031, Lag order = 6, p-value = 0.1957
## alternative hypothesis: stationary
acf(ts_high,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
adf.test(ts_low) #Debido a que el valor de p es mayor a 0.05 no es estacionaria.
##
## Augmented Dickey-Fuller Test
##
## data: ts_low
## Dickey-Fuller = -2.8939, Lag order = 6, p-value = 0.1995
## alternative hypothesis: stationary
acf(ts_low,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
adf.test(ts_close) #Debido a que el valor de p es mayor a 0.05 no es estacionaria.
##
## Augmented Dickey-Fuller Test
##
## data: ts_close
## Dickey-Fuller = -2.7398, Lag order = 6, p-value = 0.2645
## alternative hypothesis: stationary
acf(ts_close,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
adf.test(ts_adj_close) #Debido a que el valor de p es mayor a 0.05 no es estacionaria.
##
## Augmented Dickey-Fuller Test
##
## data: ts_adj_close
## Dickey-Fuller = -2.7065, Lag order = 6, p-value = 0.2786
## alternative hypothesis: stationary
acf(ts_high,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
adf.test(ts_volume) #Debido a que el valor de p es menor a 0.05 es estacionaria.
## Warning in adf.test(ts_volume): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts_volume
## Dickey-Fuller = -5.3174, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
acf(ts_volume,main="Significant Autocorrelation") #Hay autocorrelaciĂłn serial
A través de la descomposición de la serie de tiempo de adjusted price se puede observar un componente de estacionalidad y de aleatoriedad. Sin embargo, no se puede decir que hay una tendencia de crecimiento. En general, se pueden observar fluctuaciones a través del tiempo.
descpr.ts <- decompose(ts_adj_close)
plot(descpr.ts)
dec<-decompose(ts_adj_close)
dec$seasonal
## Time Series:
## Start = c(2017, 1)
## End = c(2023, 1)
## Frequency = 52
## [1] 4.0348029539 4.8417270635 4.9683294193 4.4635230347 -0.8778866192
## [6] -1.6708167884 0.1592405443 0.2740124904 -1.8779012423 1.3997419443
## [11] -0.9780809000 -2.7854474307 -3.0330279557 -0.7892001730 0.3809913943
## [16] -0.8494567096 -0.2420675769 2.0054329481 -0.3638124596 0.7160666154
## [21] 0.3596860904 -0.2029091673 4.3454091347 1.7143792404 0.8296320366
## [26] -0.6696314807 1.4819817747 1.4712845058 1.3045596289 -0.0002192192
## [31] 0.4745892904 0.0162143058 1.2670563135 -0.0367315076 1.5411875308
## [36] -0.6542197923 -1.5786618769 -0.9705394192 0.1966342193 0.2026716212
## [41] -0.6875401538 -2.2688479923 -3.6899227576 -8.1490215000 -4.6434535788
## [46] -1.4606627576 -0.7378268288 -0.7462711057 -1.1535684538 0.1100357943
## [51] 0.9771798962 1.5813556558 4.0348029539 4.8417270635 4.9683294193
## [56] 4.4635230347 -0.8778866192 -1.6708167884 0.1592405443 0.2740124904
## [61] -1.8779012423 1.3997419443 -0.9780809000 -2.7854474307 -3.0330279557
## [66] -0.7892001730 0.3809913943 -0.8494567096 -0.2420675769 2.0054329481
## [71] -0.3638124596 0.7160666154 0.3596860904 -0.2029091673 4.3454091347
## [76] 1.7143792404 0.8296320366 -0.6696314807 1.4819817747 1.4712845058
## [81] 1.3045596289 -0.0002192192 0.4745892904 0.0162143058 1.2670563135
## [86] -0.0367315076 1.5411875308 -0.6542197923 -1.5786618769 -0.9705394192
## [91] 0.1966342193 0.2026716212 -0.6875401538 -2.2688479923 -3.6899227576
## [96] -8.1490215000 -4.6434535788 -1.4606627576 -0.7378268288 -0.7462711057
## [101] -1.1535684538 0.1100357943 0.9771798962 1.5813556558 4.0348029539
## [106] 4.8417270635 4.9683294193 4.4635230347 -0.8778866192 -1.6708167884
## [111] 0.1592405443 0.2740124904 -1.8779012423 1.3997419443 -0.9780809000
## [116] -2.7854474307 -3.0330279557 -0.7892001730 0.3809913943 -0.8494567096
## [121] -0.2420675769 2.0054329481 -0.3638124596 0.7160666154 0.3596860904
## [126] -0.2029091673 4.3454091347 1.7143792404 0.8296320366 -0.6696314807
## [131] 1.4819817747 1.4712845058 1.3045596289 -0.0002192192 0.4745892904
## [136] 0.0162143058 1.2670563135 -0.0367315076 1.5411875308 -0.6542197923
## [141] -1.5786618769 -0.9705394192 0.1966342193 0.2026716212 -0.6875401538
## [146] -2.2688479923 -3.6899227576 -8.1490215000 -4.6434535788 -1.4606627576
## [151] -0.7378268288 -0.7462711057 -1.1535684538 0.1100357943 0.9771798962
## [156] 1.5813556558 4.0348029539 4.8417270635 4.9683294193 4.4635230347
## [161] -0.8778866192 -1.6708167884 0.1592405443 0.2740124904 -1.8779012423
## [166] 1.3997419443 -0.9780809000 -2.7854474307 -3.0330279557 -0.7892001730
## [171] 0.3809913943 -0.8494567096 -0.2420675769 2.0054329481 -0.3638124596
## [176] 0.7160666154 0.3596860904 -0.2029091673 4.3454091347 1.7143792404
## [181] 0.8296320366 -0.6696314807 1.4819817747 1.4712845058 1.3045596289
## [186] -0.0002192192 0.4745892904 0.0162143058 1.2670563135 -0.0367315076
## [191] 1.5411875308 -0.6542197923 -1.5786618769 -0.9705394192 0.1966342193
## [196] 0.2026716212 -0.6875401538 -2.2688479923 -3.6899227576 -8.1490215000
## [201] -4.6434535788 -1.4606627576 -0.7378268288 -0.7462711057 -1.1535684538
## [206] 0.1100357943 0.9771798962 1.5813556558 4.0348029539 4.8417270635
## [211] 4.9683294193 4.4635230347 -0.8778866192 -1.6708167884 0.1592405443
## [216] 0.2740124904 -1.8779012423 1.3997419443 -0.9780809000 -2.7854474307
## [221] -3.0330279557 -0.7892001730 0.3809913943 -0.8494567096 -0.2420675769
## [226] 2.0054329481 -0.3638124596 0.7160666154 0.3596860904 -0.2029091673
## [231] 4.3454091347 1.7143792404 0.8296320366 -0.6696314807 1.4819817747
## [236] 1.4712845058 1.3045596289 -0.0002192192 0.4745892904 0.0162143058
## [241] 1.2670563135 -0.0367315076 1.5411875308 -0.6542197923 -1.5786618769
## [246] -0.9705394192 0.1966342193 0.2026716212 -0.6875401538 -2.2688479923
## [251] -3.6899227576 -8.1490215000 -4.6434535788 -1.4606627576 -0.7378268288
## [256] -0.7462711057 -1.1535684538 0.1100357943 0.9771798962 1.5813556558
## [261] 4.0348029539 4.8417270635 4.9683294193 4.4635230347 -0.8778866192
## [266] -1.6708167884 0.1592405443 0.2740124904 -1.8779012423 1.3997419443
## [271] -0.9780809000 -2.7854474307 -3.0330279557 -0.7892001730 0.3809913943
## [276] -0.8494567096 -0.2420675769 2.0054329481 -0.3638124596 0.7160666154
## [281] 0.3596860904 -0.2029091673 4.3454091347 1.7143792404 0.8296320366
## [286] -0.6696314807 1.4819817747 1.4712845058 1.3045596289 -0.0002192192
## [291] 0.4745892904 0.0162143058 1.2670563135 -0.0367315076 1.5411875308
## [296] -0.6542197923 -1.5786618769 -0.9705394192 0.1966342193 0.2026716212
## [301] -0.6875401538 -2.2688479923 -3.6899227576 -8.1490215000 -4.6434535788
## [306] -1.4606627576 -0.7378268288 -0.7462711057 -1.1535684538 0.1100357943
## [311] 0.9771798962 1.5813556558 4.0348029539
plot(dec$seasonal)
Al intentar construir modelos ARMA primero se buscarĂĄ hacerlo con el
logaritmo de la serie de tiempo no estacionaria. La transformaciĂłn
logarĂtmica no fue suficiente e incluso se puede observar un AIC
negativo.
#AIC negativo
modelo0<-auto.arima(log(ts_adj_close),stationary=T,seasonal=T,stepwise=T)
summary(modelo0)
## Series: log(ts_adj_close)
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.9522 5.013
## s.e. 0.0164 0.034
##
## sigma^2 = 0.0009359: log likelihood = 647.13
## AIC=-1288.25 AICc=-1288.17 BIC=-1277.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0003316132 0.03049417 0.02181572 0.002746035 0.4379021 0.1640419
## ACF1
## Training set -0.04451921
ndiffs(log(ts_adj_close))
## [1] 1
ts_adj_diff=diff(log(ts_adj_close),differences=1)
Al aplicar las diferencias para poder construir un modelo ARMA se vuelve a observar que la serie de tiempo tiene un AIC negativo. Esto indica que el modelo no es bueno ya que los valores pueden estar overfitted.
#AIC negativo
modelo0_1<-auto.arima(ts_adj_diff,stationary=T,seasonal=T,stepwise=T)
summary(modelo0_1)
## Series: ts_adj_diff
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 0.000954: log likelihood = 642.26
## AIC=-1282.51 AICc=-1282.5 BIC=-1278.77
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0001781316 0.03088612 0.02212478 100 100 0.6591839 -0.06952365
Se busca hacer un modelo ARIMA en el que la serie de tiempo se vuelva estacionaria. Como se puede observar, se incluye un diferenciador. Asimismo, se indica que la serie tiene un componente estacional, como fue visto en la descomposiciĂłn de la serie de tiempo.
modelo1<-auto.arima(ts_adj_close,stationary=F,seasonal=T,stepwise=T)
summary(modelo1)
## Series: ts_adj_close
## ARIMA(0,1,0)
##
## sigma^2 = 19.44: log likelihood = -905.61
## AIC=1813.22 AICc=1813.23 BIC=1816.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0262102 4.402009 3.246531 -0.02933105 2.204892 0.1660135
## ACF1
## Training set -0.07759413
Se busca hacer el modelo sin el componente estacional y se encuentran los mismos resultados que en el modelo anterior.
modelo2<-auto.arima(ts_adj_close,stationary=F,seasonal=F,stepwise=T)
summary(modelo2)
## Series: ts_adj_close
## ARIMA(0,1,0)
##
## sigma^2 = 19.44: log likelihood = -905.61
## AIC=1813.22 AICc=1813.23 BIC=1816.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0262102 4.402009 3.246531 -0.02933105 2.204892 0.1660135
## ACF1
## Training set -0.07759413
auto.arima(ts_adj_close, trace=T)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,1,2)(1,0,1)[52] with drift : 1854.148
## ARIMA(0,1,0) with drift : 1812.282
## ARIMA(1,1,0)(1,0,0)[52] with drift : 1849.155
## ARIMA(0,1,1)(0,0,1)[52] with drift : 1814.061
## ARIMA(0,1,0) : 1810.267
## ARIMA(0,1,0)(1,0,0)[52] with drift : 1847.536
## ARIMA(0,1,0)(0,0,1)[52] with drift : 1814.032
## ARIMA(0,1,0)(1,0,1)[52] with drift : 1846.692
## ARIMA(1,1,0) with drift : 1813.274
## ARIMA(0,1,1) with drift : 1812.466
## ARIMA(1,1,1) with drift : 1815.14
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(0,1,0) : 1813.234
##
## Best model: ARIMA(0,1,0)
## Series: ts_adj_close
## ARIMA(0,1,0)
##
## sigma^2 = 19.44: log likelihood = -905.61
## AIC=1813.22 AICc=1813.23 BIC=1816.96
Se utilizan los modelos con mejor desempeño de la función anterior y se encuentra un componente de diferenciación y uno de medias móviles.
modelo3 <- arima(ts_adj_close, order = c(0,1,1))
summary(modelo3)
##
## Call:
## arima(x = ts_adj_close, order = c(0, 1, 1))
##
## Coefficients:
## ma1
## -0.0763
## s.e. 0.0560
##
## sigma^2 estimated as 19.32: log likelihood = -904.69, aic = 1813.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02882179 4.38895 3.21987 -0.03086032 2.185639 0.9887563
## ACF1
## Training set -0.0009167525
Se busca comparar con otro modelo para poder seleccionar el de mejor desempeño. Tomando en cuenta las medidas de RMSE y AIC. Considerando esos valores el mejor modelo serĂa uno con un componente autorregresivo y uno de diferenciaciĂłn. Es decir, un modelo ARIMA(1,1,0).
modelo4 <- arima(ts_adj_close, order = c(1,1,0))
summary(modelo4)
##
## Call:
## arima(x = ts_adj_close, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.0774
## s.e. 0.0564
##
## sigma^2 estimated as 19.32: log likelihood = -904.67, aic = 1813.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02873053 4.388722 3.21953 -0.03070237 2.185218 0.9886519
## ACF1
## Training set 0.0004438761
El modelo tiene un valor de p mayor a 0.05 en la prueba de Box-Ljung, lo que indica que los residuales son independientes, por lo que se estĂĄ trabajando con un buen modelo ya que tiene ruido blanco.
tsdiag(modelo4)
Box.test(residuals(modelo4), type= "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(modelo4)
## X-squared = 6.2262e-05, df = 1, p-value = 0.9937
error=residuals(modelo4)
plot(error)
Se puede observar que los residuales del modelo son estacionarios, lo cual es bueno porque indica mejores precisiones.
residuos <- residuals(modelo4)
adf.test(residuos)
## Warning in adf.test(residuos): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuos
## Dickey-Fuller = -7.103, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
pronostico <- forecast::forecast(modelo4, h=10)
plot(pronostico)
femsa2024$Date<-as.Date(femsa2024$Date)
femsa2024
## Date Open High Low Close Adj.Close Volume
## 1 2024-01-01 221.46 223.00 211.56 213.45 213.45 5479142
## 2 2024-01-08 214.65 220.26 214.23 219.20 219.20 7690391
## 3 2024-01-15 218.00 229.07 217.60 228.25 228.25 13469639
## 4 2024-01-22 228.00 237.61 227.03 233.85 233.85 9995229
## 5 2024-01-29 233.58 243.90 232.34 241.52 241.52 8962110
## 6 2024-02-05 242.00 244.94 236.00 242.58 242.58 13399393
## 7 2024-02-12 242.28 245.00 224.99 227.51 227.51 7333646
## 8 2024-02-19 229.92 229.92 200.66 203.40 203.40 18807624
## 9 2024-02-26 204.00 215.28 200.00 212.28 212.28 25730330
## 10 2024-03-04 210.26 213.06 209.56 210.22 210.22 3081163
df<- data.frame(date=femsa2024$Date, pronostico=pronostico$mean, real=femsa2024$Adj.Close)
df
## date pronostico real
## 1 2024-01-01 149.3112 213.45
## 2 2024-01-08 149.2980 219.20
## 3 2024-01-15 149.2990 228.25
## 4 2024-01-22 149.2990 233.85
## 5 2024-01-29 149.2990 241.52
## 6 2024-02-05 149.2990 242.58
## 7 2024-02-12 149.2990 227.51
## 8 2024-02-19 149.2990 203.40
## 9 2024-02-26 149.2990 212.28
## 10 2024-03-04 149.2990 210.22
Como se puede observar, el pronóstico del ARIMA no fue preciso. En realidad los valores del pronóstico se mantuvieron bajos y constantes mientras que en realidad hubo un incremento y después un declive en los precios de las acciones.
plot_data <- ggplot(femsa, aes(x = Date, y = Adj.Close)) +
geom_line(color = "blue") +
labs(x = "Date", y = "Adjusted Close", title = "Original Data until 2023") +
theme_minimal()
plot_data <- plot_data +
geom_line(data = femsa2024, aes(x = Date, y = Adj.Close), color = "green") +
geom_line(data = df, aes(x = date, y = pronostico), color = "red") +
labs(color = "Lines", title = "Original Data until 2023 with New Values from 2024 and ARIMA Forecast") +
scale_color_manual(values = c("blue", "green", "red"))
print(plot_data)